Check working directory

getwd()
## [1] "/Users/alexg/R files/hair_cortisol/skew-normal FINAL"

Load packages

library(readxl)
library(psych)
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
##   method          from  
##   plot.transform  scales
##   print.transform scales
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:psych':
## 
##     describe
## The following object is masked from 'package:base':
## 
##     transform
library(vtable)
## Loading required package: kableExtra
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:psych':
## 
##     cs
## The following object is masked from 'package:stats':
## 
##     ar
library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/alexg/.cmdstan/cmdstan-2.36.0
## - CmdStan version: 2.36.0
## Loading required package: posterior
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## The following object is masked from 'package:dlookr':
## 
##     entropy
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## The following objects are masked from 'package:base':
## 
##     %in%, match
## Loading required package: parallel
## rethinking (Version 2.42)
## 
## Attaching package: 'rethinking'
## The following objects are masked from 'package:brms':
## 
##     LOO, stancode, WAIC
## The following objects are masked from 'package:psych':
## 
##     logistic, logit, sim
## The following object is masked from 'package:stats':
## 
##     rstudent
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rethinking':
## 
##     compare
library(priorsense)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ tidyr::expand()     masks reshape::expand()
## ✖ tidyr::extract()    masks dlookr::extract()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::map()        masks rethinking::map()
## ✖ reshape::rename()   masks dplyr::rename()
## ✖ lubridate::stamp()  masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
## 
## Attaching package: 'sm'
## 
## The following object is masked from 'package:dlookr':
## 
##     binning
## 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(bayesplot)
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'bayesplot'
## 
## The following object is masked from 'package:posterior':
## 
##     rhat
## 
## The following object is masked from 'package:brms':
## 
##     rhat
library(bayestestR)

Load data

df <- read_xlsx("hair_cort_dog_all.xlsx", col_types = c("text", "text",  
                               "text", "text", "text", "text",
                               "text", "numeric","text", "skip",
                               "numeric", "skip", "skip", 
                               "numeric", "skip"))
df <- as.data.frame(df)

INITIAL DATA PLOTTING AND EXPLORATION

Check characteristics of df

dim(df) # will tell you how many rows and columns the dataset has
## [1] 73 11
class(df) # will tell you what data structure has the dataset been assigned
## [1] "data.frame"

Explore the dataset to understand its structure.

head(df)
##   number   group visit season breed_group coat_colour    sex age comorbidity
## 1     c1 stopped    v0 winter         ret        dark   Male  43         yes
## 2     c2 stopped    v0 autumn         mix        dark   Male 105         yes
## 3     c3 stopped    v0 spring        ckcs         mix Female 117         yes
## 4     c4 stopped    v0 summer         ret        dark Female 108         yes
## 5     c5 stopped    v0 summer         ret        dark Female 110         yes
## 6     c6 stopped    v0 winter         mix       light Female 120         yes
##   fat_percent cortisol
## 1    52.21393 4.924220
## 2    38.52059 7.304202
## 3    46.94916 1.590000
## 4    44.46813 0.861570
## 5    39.59363 6.217317
## 6          NA 4.426785

1. Get summary stats for numeric data

a. all data

numeric_df <- Filter(is.numeric, df)
describe(numeric_df) # the describe function in psych provides summary stats
## # A tibble: 3 × 26
##   described_variables     n    na  mean    sd se_mean   IQR skewness kurtosis
##   <chr>               <int> <int> <dbl> <dbl>   <dbl> <dbl>    <dbl>    <dbl>
## 1 age                    73     0 95.8  35.6     4.16 44      -0.104 -0.00589
## 2 fat_percent            55    18 40.5   7.82    1.05  7.82   -0.294  1.12   
## 3 cortisol               73     0  8.11 16.5     1.93  5.43    4.05  18.7    
## # ℹ 17 more variables: p00 <dbl>, p01 <dbl>, p05 <dbl>, p10 <dbl>, p20 <dbl>,
## #   p25 <dbl>, p30 <dbl>, p40 <dbl>, p50 <dbl>, p60 <dbl>, p70 <dbl>,
## #   p75 <dbl>, p80 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, p100 <dbl>

b. completed v0

df_compv0 <- df[ df$group == "completed" , ] 
df_compv0 <- df_compv0[ df_compv0$visit == "v0" , ]
sumtable(df_compv0, digits = 5, add.median = TRUE)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 50 Pctl. 75 Max
group 21
… completed 21 100%
visit 21
… v0 21 100%
season 21
… autumn 3 14.286%
… spring 4 19.048%
… summer 8 38.095%
… winter 6 28.571%
breed_group 21
… ckcs 1 4.762%
… mix 5 23.81%
… other 8 38.095%
… pug 2 9.524%
… ret 5 23.81%
coat_colour 21
… dark 8 38.095%
… light 10 47.619%
… mix 3 14.286%
sex 21
… Female 13 61.905%
… Male 8 38.095%
age 21 91.095 36.12 29 67 96 109 165
comorbidity 21
… no 7 33.333%
… yes 14 66.667%
fat_percent 17 44.529 6.3901 37.038 39.783 43.414 47.635 61.197
cortisol 21 7.649 14.289 0.6871 1.6768 2.1517 6.8455 59.037

b. completed v1

df_compv1 <- df[ df$group == "completed" , ] 
df_compv1 <- df_compv1[ df_compv1$visit == "v1" , ]
sumtable(df_compv1, digits = 5, add.median = TRUE)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 50 Pctl. 75 Max
group 21
… completed 21 100%
visit 21
… v1 21 100%
season 21
… autumn 8 38.095%
… spring 1 4.762%
… summer 9 42.857%
… winter 3 14.286%
breed_group 21
… ckcs 1 4.762%
… mix 5 23.81%
… other 8 38.095%
… pug 2 9.524%
… ret 5 23.81%
coat_colour 21
… dark 9 42.857%
… light 10 47.619%
… mix 2 9.524%
sex 21
… Female 13 61.905%
… Male 8 38.095%
age 21 109.24 35.474 42 94 105 129 182
comorbidity 21
… no 7 33.333%
… yes 14 66.667%
fat_percent 12 32.619 8.7784 17.862 27.287 32.854 37.529 49.87
cortisol 21 3.6267 3.7377 0.41409 1.2354 1.7837 5.5537 14.845

b. stopped

df_stop <- df[ df$group == "stopped" , ] 
sumtable(df_stop, digits = 5, add.median = TRUE)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 50 Pctl. 75 Max
group 31
… stopped 31 100%
visit 31
… v0 31 100%
season 31
… autumn 10 32.258%
… spring 9 29.032%
… summer 5 16.129%
… winter 7 22.581%
breed_group 31
… ckcs 5 16.129%
… mix 6 19.355%
… other 10 32.258%
… pug 3 9.677%
… ret 7 22.581%
coat_colour 31
… dark 13 41.935%
… light 8 25.806%
… mix 10 32.258%
sex 31
… Female 17 54.839%
… Male 14 45.161%
age 31 90 33.948 16 71 98 110 155
comorbidity 31
… no 1 3.226%
… yes 30 96.774%
fat_percent 26 41.46 5.5041 30.86 37.773 41.875 44.317 54.385
cortisol 31 11.457 21.911 0.46298 1.7821 2.8454 7.2434 104.62

2. Check normality of all numeric variables

a. graphical assessment

plot_normality(numeric_df)

b. shapiro-wilk test

apply(numeric_df, 2, shapiro.test)
## $age
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97361, p-value = 0.1288
## 
## 
## $fat_percent
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97956, p-value = 0.4692
## 
## 
## $cortisol
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.46269, p-value = 6.756e-15

c. repeat Q-Q plots with transformed data

i. log(cortisol)

qqnorm(df$cortisol)
qqline(df$cortisol, col = "red")

qqnorm(log(df$cortisol))
qqline(log(df$cortisol), col = "red")

ii Shapiro test for log cortisol

shapiro.test(log(df$cortisol))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(df$cortisol)
## W = 0.94725, p-value = 0.004126

3. Check data numerically

summary(df$cortisol)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.4141   1.4119   2.3331   8.1089   6.8455 104.6172
summary(log(df$cortisol))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

a. log-transform cortisol

df$lgCort <- log(df$cortisol)
summary(df$lgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

i. visualise

hist(df$lgCort)

b. Create simple category name for breed and convert to factor

df$breed <- df$breed_group
df$breed <- factor(df$breed, levels = c("mix", "ckcs", "pug", "ret", "other"))
head(df$breed)
## [1] ret  mix  ckcs ret  ret  mix 
## Levels: mix ckcs pug ret other

c. Make light hair colour the reference category

df$coat_colour <- factor(df$coat_colour, levels = c("light", "mix", "dark"), ordered = FALSE)
head(df$coat_colour)
## [1] dark  dark  mix   dark  dark  light
## Levels: light mix dark

4. Generate data summary

all data

sumtable(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
group 73
… completed 42 58%
… stopped 31 42%
visit 73
… v0 52 71%
… v1 21 29%
season 73
… autumn 21 29%
… spring 14 19%
… summer 22 30%
… winter 16 22%
breed_group 73
… ckcs 7 10%
… mix 16 22%
… other 26 36%
… pug 7 10%
… ret 17 23%
coat_colour 73
… light 28 38%
… mix 15 21%
… dark 30 41%
sex 73
… Female 43 59%
… Male 30 41%
age 73 96 36 16 73 117 182
comorbidity 73
… no 15 21%
… yes 58 79%
fat_percent 55 40 7.8 18 37 45 61
cortisol 73 8.1 16 0.41 1.4 6.8 105
lgCort 73 1.2 1.2 -0.88 0.34 1.9 4.7
breed 73
… mix 16 22%
… ckcs 7 10%
… pug 7 10%
… ret 17 23%
… other 26 36%

5. Visualise associations

a. Between Cortisol and sex with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(cortisol ~ sex, col = "firebrick",
        data = df)

b. Between lgCortisol and sex with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ sex, col = "lemonchiffon",
        data = df)

c. Between lgCortisol and breed

i. with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ breed, col = "firebrick",
        data = df)

ii. with stripchart

stripchart(lgCort ~ breed, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

d. Between Cortisol and comorbidity with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(cortisol ~ comorbidity, col = "steelblue",
        data = df)

e. Between log(cortisol) and comorbidity with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ comorbidity, col = "steelblue",
        data = df)

f. Between cortisol and age with a dotplot

plot(cortisol ~ age, col = "steelblue3", data = df, pch = 20)

g. Between log(cortisol) and age with a dotplot

plot(lgCort ~ age, col = "steelblue3", data = df, pch = 20)

h. Between cortisol and body fat with a dotplot

plot(cortisol ~ fat_percent, col = "steelblue3", data = df, pch = 20)

h. Between log(cortisol) and body fat with a dotplot

plot(lgCort ~ fat_percent, col = "steelblue3", data = df, pch = 20)

corr.test(df$lgCort, df$fat_percent)
## Call:corr.test(x = df$lgCort, y = df$fat_percent)
## Correlation matrix 
## [1] 0.06
## Sample Size 
## [1] 55
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0.65
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

STANDARDISE DATA FOR MODELLING

1. Standardise body fat

df$sFat <- standardize(df$fat_percent)
summary(df$sFat)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -2.89165 -0.43568  0.04814  0.00000  0.56366  2.64873       18

a. visualise standardised lgCort

hist(df$sFat)

2. Standadrise Age

df$sAge <- standardize(df$age)
summary(df$sAge)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.2444 -0.6422  0.1448  0.0000  0.5945  2.4215

3. Standardise lg(cortisol)

df$slgCort <- standardize(df$lgC)
summary(df$slgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.7079 -0.6925 -0.2768  0.0000  0.6142  2.8713

a. visualise standardised lgCort

hist(df$slgCort)

2. create dataset only containing complete data

df2 <- na.omit(df)

MODEL FOR THE EFFECT OF sAGE ON HAIR CORTISOL

1. Model code

model code

model <- brm(slgCort ~ sAge + breed + (1 | visit), family = skew_normal(), data = df2)

2. Check what priors need to be set

default_prior(slgCort ~ sAge + breed + (1 | visit),
                   family = skew_normal(),
                   data = df2)
##                    prior     class       coef group resp dpar nlpar lb ub
##             normal(0, 4)     alpha                                       
##                   (flat)         b                                       
##                   (flat)         b  breedckcs                            
##                   (flat)         b breedother                            
##                   (flat)         b   breedpug                            
##                   (flat)         b   breedret                            
##                   (flat)         b       sAge                            
##  student_t(3, -0.1, 2.5) Intercept                                       
##     student_t(3, 0, 2.5)        sd                                   0   
##     student_t(3, 0, 2.5)        sd            visit                  0   
##     student_t(3, 0, 2.5)        sd  Intercept visit                  0   
##     student_t(3, 0, 2.5)     sigma                                   0   
##        source
##       default
##       default
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##       default
##       default
##  (vectorized)
##  (vectorized)
##       default

Published information about associations between hair cortisol and age

Not much evidence of an effect of age on hair cortisol in the literature. However, no effect in one study (Bowland GB. Front. Vet. Sci 2020; 7:565346. doi: 10.3389/fvets.2020.565346)

No published data about effects on breed, but this is plausible (as effect of hair colour) However, unclear as to which breeds will differ and which way.

Therefore, use regularising priors for all but keep it neutral and relatively broad.

3. Set priors

# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_sig <- set_prior("exponential(1)", class = "sigma")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_b_sAge <- set_prior("normal(0, 0.5)", class = "b", coef = "sAge")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors <- c(prior_int, prior_sig, prior_b, prior_b_sAge, prior_sd, prior_alpha)

4. Plot prior

a. Prior for intercept

x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = 0, sd = 0.5)
plot(y ~ x, type = "l")

b. Prior for sigma

x <- seq(0, 3, length.out = 100)
y <- dexp(x, rate = 1)
plot(y ~ x, type = "l")

c. Prior for beta of sAge

x <- seq(-2, 2, length.out = 100)
y <- dnorm(x, mean = 0, sd = 0.5)
plot(y ~ x, type = "l")

Prior for beta of breed

x <- seq(-2, 2, length.out = 100)
y <- dnorm(x, mean = 0, sd = 1)
plot(y ~ x, type = "l")

5. Run model

Increased adapt_delta >0.8 (0.9 here), as had divergent transitions

set.seed(666)
model <- brm(slgCort ~ sAge + breed + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                  control=list(adapt_delta=0.99999, stepsize = 0.001, max_treedepth = 15),
                   iter = 8000, warmup = 2000,
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 8.5e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.85 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 8000 [  0%]  (Warmup)
## Chain 1: Iteration:  800 / 8000 [ 10%]  (Warmup)
## Chain 1: Iteration: 1600 / 8000 [ 20%]  (Warmup)
## Chain 1: Iteration: 2001 / 8000 [ 25%]  (Sampling)
## Chain 1: Iteration: 2800 / 8000 [ 35%]  (Sampling)
## Chain 1: Iteration: 3600 / 8000 [ 45%]  (Sampling)
## Chain 1: Iteration: 4400 / 8000 [ 55%]  (Sampling)
## Chain 1: Iteration: 5200 / 8000 [ 65%]  (Sampling)
## Chain 1: Iteration: 6000 / 8000 [ 75%]  (Sampling)
## Chain 1: Iteration: 6800 / 8000 [ 85%]  (Sampling)
## Chain 1: Iteration: 7600 / 8000 [ 95%]  (Sampling)
## Chain 1: Iteration: 8000 / 8000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 3.621 seconds (Warm-up)
## Chain 1:                5.883 seconds (Sampling)
## Chain 1:                9.504 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 8000 [  0%]  (Warmup)
## Chain 2: Iteration:  800 / 8000 [ 10%]  (Warmup)
## Chain 2: Iteration: 1600 / 8000 [ 20%]  (Warmup)
## Chain 2: Iteration: 2001 / 8000 [ 25%]  (Sampling)
## Chain 2: Iteration: 2800 / 8000 [ 35%]  (Sampling)
## Chain 2: Iteration: 3600 / 8000 [ 45%]  (Sampling)
## Chain 2: Iteration: 4400 / 8000 [ 55%]  (Sampling)
## Chain 2: Iteration: 5200 / 8000 [ 65%]  (Sampling)
## Chain 2: Iteration: 6000 / 8000 [ 75%]  (Sampling)
## Chain 2: Iteration: 6800 / 8000 [ 85%]  (Sampling)
## Chain 2: Iteration: 7600 / 8000 [ 95%]  (Sampling)
## Chain 2: Iteration: 8000 / 8000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 3.422 seconds (Warm-up)
## Chain 2:                7.116 seconds (Sampling)
## Chain 2:                10.538 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 8000 [  0%]  (Warmup)
## Chain 3: Iteration:  800 / 8000 [ 10%]  (Warmup)
## Chain 3: Iteration: 1600 / 8000 [ 20%]  (Warmup)
## Chain 3: Iteration: 2001 / 8000 [ 25%]  (Sampling)
## Chain 3: Iteration: 2800 / 8000 [ 35%]  (Sampling)
## Chain 3: Iteration: 3600 / 8000 [ 45%]  (Sampling)
## Chain 3: Iteration: 4400 / 8000 [ 55%]  (Sampling)
## Chain 3: Iteration: 5200 / 8000 [ 65%]  (Sampling)
## Chain 3: Iteration: 6000 / 8000 [ 75%]  (Sampling)
## Chain 3: Iteration: 6800 / 8000 [ 85%]  (Sampling)
## Chain 3: Iteration: 7600 / 8000 [ 95%]  (Sampling)
## Chain 3: Iteration: 8000 / 8000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 3.641 seconds (Warm-up)
## Chain 3:                3.233 seconds (Sampling)
## Chain 3:                6.874 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 8000 [  0%]  (Warmup)
## Chain 4: Iteration:  800 / 8000 [ 10%]  (Warmup)
## Chain 4: Iteration: 1600 / 8000 [ 20%]  (Warmup)
## Chain 4: Iteration: 2001 / 8000 [ 25%]  (Sampling)
## Chain 4: Iteration: 2800 / 8000 [ 35%]  (Sampling)
## Chain 4: Iteration: 3600 / 8000 [ 45%]  (Sampling)
## Chain 4: Iteration: 4400 / 8000 [ 55%]  (Sampling)
## Chain 4: Iteration: 5200 / 8000 [ 65%]  (Sampling)
## Chain 4: Iteration: 6000 / 8000 [ 75%]  (Sampling)
## Chain 4: Iteration: 6800 / 8000 [ 85%]  (Sampling)
## Chain 4: Iteration: 7600 / 8000 [ 95%]  (Sampling)
## Chain 4: Iteration: 8000 / 8000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 2.38 seconds (Warm-up)
## Chain 4:                4.399 seconds (Sampling)
## Chain 4:                6.779 seconds (Total)
## Chain 4:

6. Get summary of model

summary(model)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: slgCort ~ sAge + breed + (1 | visit) 
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
##          total post-warmup draws = 24000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.35      0.36     0.01     1.35 1.00     8558     9570
## 
## Regression Coefficients:
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept      0.00      0.31    -0.62     0.63 1.00     9511    11347
## sAge          -0.12      0.10    -0.31     0.08 1.00    14786    15585
## breedckcs      0.06      0.37    -0.71     0.75 1.00    15159    14304
## breedpug       0.03      0.38    -0.75     0.74 1.00    13219    13785
## breedret      -0.01      0.29    -0.58     0.54 1.00    12590    15997
## breedother    -0.00      0.28    -0.55     0.56 1.00    12057    14769
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.02      0.09     0.86     1.22 1.00    17057    16059
## alpha     4.03      1.30     1.86     6.92 1.00    15946    13703
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

7. MCMC diagnostics

a. check for convergence

plot(model)

Looking for hairy caterpillars

b. try a trank plot as well

mcmc_plot(model, type = 'rank_overlay')

8. Calculate 97% HPDI for age

Usually better than the compatoability intervals given in the summary

draws <- as.matrix(model)
HPDI(draws[,2], 0.97) # 1st column is draws for age
##      |0.97      0.97| 
## -0.3336432  0.1003024

9. Calculate R2 for model

bayes_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##      Estimate  Est.Error       Q1.5        Q50     Q98.5
## R2 0.05997963 0.03154122 0.01089833 0.05498511 0.1445716
loo_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##     Estimate  Est.Error       Q1.5        Q50       Q98.5
## R2 -0.107032 0.04543035 -0.2204901 -0.1044811 -0.01885689

CHECKS ON MODEL

1. Basic check of simulations based on posterior distribution, versus the real data distribution

checks whether actual data is similar to simulated data.

pp_check(model, ndraws = 100) 

2. Check some individual draws versus observed using pp_check

par(mfrow = c(1,1))
pp_check(model, type = "hist", ndraws = 11, binwidth = 0.25) # separate histograms of 11 MCMC draws vs actual data

3. Other pp_check graphs

pp_check(model, type = "error_hist", ndraws = 11) # separate histograms of errors for 11 draws
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pp_check(model, type = "scatter_avg", ndraws = 100) # scatter plot

pp_check(model, type = "stat_2d") #  scatterplot of joint posteriors
## Using all posterior draws for ppc type 'stat_2d' by default.
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

# PPC functions for predictive checks based on (approximate) leave-one-out (LOO) cross-validation
pp_check(model, type = "loo_pit_overlay", ndraws = 1000) 
## NOTE: The kernel density estimate assumes continuous observations and is not optimal for discrete observations.

5. Pairs plot

pairs(model)

PSIS LOO-CV to check model performance

loo_model <- loo(model, moment_match = TRUE)
loo_model
## 
## Computed from 24000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -104.9  6.7
## p_loo         7.3  1.8
## looic       209.7 13.4
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.3]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

AUTOMATED PRIOR SENSITIVITY USING THE PRIOR SENSE PACKAGE

1. Sensitivity check

First, check the sensitivity of the prior and likelihood to power-scaling. Posterior and posteriors resulting from power-scaling.

powerscale_sensitivity(model, variable = c("b_Intercept", "sigma", "b_sAge", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret"), facet_rows = "variable")
## Sensitivity based on cjs_dist
## Prior selection: all priors
## Likelihood selection: all data
## 
##      variable prior likelihood diagnosis
##   b_Intercept 0.048      0.033         -
##         sigma 0.030      0.160         -
##        b_sAge 0.010      0.090         -
##   b_breedckcs 0.016      0.081         -
##  b_breedother 0.015      0.080         -
##    b_breedpug 0.017      0.076         -
##    b_breedret 0.015      0.077         -

2. Kernel density

powerscale_plot_dens(model, variable = c("b_Intercept", "sigma", "b_sAge", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret"), facet_rows = "variable")

3. Empirical cumulative distribution functions

powerscale_plot_ecdf(model, variable = c("b_Intercept", "sigma", "b_sAge", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret"), facet_rows = "variable")

4. Quantities

powerscale_plot_quantities(model, variable = c("b_Intercept", "sigma", "b_sAge", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret"), facet_rows = "variable")

5. Check mean and sd of mode to see if the issue can be identified

mean(model$data$slgCort)
## [1] -1.76419e-16
sd(model$data$slgCort)
## [1] 1

These values appear similar to what was set for the priors, so seems OK?

6. Now use bayestestR package to check priors are informative

check_prior(model, effects = "all")
##             Parameter Prior_Quality
## 1         b_Intercept   informative
## 2              b_sAge   informative
## 3         b_breedckcs   informative
## 4          b_breedpug   informative
## 5          b_breedret   informative
## 6        b_breedother   informative
## 7 sd_visit__Intercept   informative

CHECK PRIOR PREDICTION LINES FROM FINAL MODEL

1. Obtain draws of priors from final model

prior <- prior_draws(model)
prior %>% glimpse()
## Rows: 24,000
## Columns: 9
## $ Intercept    <dbl> -1.26261055, 1.01648870, 0.10310824, 0.85129812, -1.06815…
## $ b_sAge       <dbl> -0.009284525, 0.088291906, -0.051348348, 0.224020521, -0.…
## $ b_breedckcs  <dbl> 0.62103338, -1.98217481, -0.84186445, -1.87205937, 0.6158…
## $ b_breedpug   <dbl> -0.43052214, 0.16545563, 1.31342160, 0.65748021, 0.165988…
## $ b_breedret   <dbl> 0.69002551, -1.58100060, -1.17922231, 0.53124653, -1.3533…
## $ b_breedother <dbl> 1.31064513, -0.34889280, -0.88909081, 0.47875565, -0.8179…
## $ sigma        <dbl> 0.05763660, 0.53931457, 1.89768578, 0.58513351, 0.0430324…
## $ alpha        <dbl> 4.5922207, 3.6948566, -0.5571423, 5.1057437, 0.1058130, 2…
## $ sd_visit     <dbl> 0.61844675, 0.70935660, 0.13294819, 1.82120006, 0.3837163…

2. Plot prior prediction lines for sAge

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(-2, 2)) %>% 
  mutate(c = Intercept + b_sAge * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  labs(x = "Age (std)",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-2, 2)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

3. Priors for breed CKCS with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedckcs * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "CKCS Breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

4. Priors for breed pug with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedpug * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Pug breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

5. Priors for breed ret with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedret * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Retriever breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

6. Priors for breed other with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedother * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Other Breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

CHECK PRIOR PREDICTIVE DISTRIBUTION

1. Prior Predictive Distribution

Can simulate data just on the priors. Fit model but only consider prior when fitting model. If this looks reasonable, it helps to confirm that your priors were reasonable

set.seed(666)
model_priors_only <- brm(slgCort ~ sAge + breed + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                   sample_prior = "only")
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 7e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.7 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.021 seconds (Warm-up)
## Chain 1:                0.016 seconds (Sampling)
## Chain 1:                0.037 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 2:                0.016 seconds (Sampling)
## Chain 2:                0.038 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.02 seconds (Warm-up)
## Chain 3:                0.016 seconds (Sampling)
## Chain 3:                0.036 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.019 seconds (Warm-up)
## Chain 4:                0.019 seconds (Sampling)
## Chain 4:                0.038 seconds (Total)
## Chain 4:

2. Check predictions against priors

pp_check(model_priors_only, ndraws = 100)

VARIANCE-COVARIANCE MATRIX

as_draws_df(model) %>%
  select(b_Intercept:sigma) %>%
  cov() %>%
  round(digits = 3)
## Warning: Dropping 'draws_df' class as required metadata was removed.
##                     b_Intercept b_sAge b_breedckcs b_breedpug b_breedret
## b_Intercept               0.097 -0.008      -0.040     -0.047     -0.043
## b_sAge                   -0.008  0.010       0.005      0.013      0.006
## b_breedckcs              -0.040  0.005       0.136      0.042      0.040
## b_breedpug               -0.047  0.013       0.042      0.145      0.044
## b_breedret               -0.043  0.006       0.040      0.044      0.082
## b_breedother             -0.047  0.010       0.039      0.049      0.043
## sd_visit__Intercept       0.002  0.001      -0.003      0.000     -0.001
## sigma                     0.004  0.000       0.001      0.000      0.001
##                     b_breedother sd_visit__Intercept sigma
## b_Intercept               -0.047               0.002 0.004
## b_sAge                     0.010               0.001 0.000
## b_breedckcs                0.039              -0.003 0.001
## b_breedpug                 0.049               0.000 0.000
## b_breedret                 0.043              -0.001 0.001
## b_breedother               0.079               0.001 0.000
## sd_visit__Intercept        0.001               0.132 0.001
## sigma                      0.000               0.001 0.009

MANUAL POSTERIOR PREDICITVE DISTRIBUTION CHECKS

NB Uses posterior_predict ## 1. Posterior predictive distribution plots for a continuous predictor variable

par(mfrow = c(1,4))
# plot the observed data
plot(slgCort ~ sAge, main = "Observed", col = "steelblue",
     data = df, ylim = c(-3, 3)) # This is the observed data

# use posterior predict to simulate predictions
ppd <- posterior_predict(model) 
dim(ppd)
## [1] 24000    73
# Plot 3 simulations of the data
plot(ppd[50,] ~ df$sAge, main = "Simulated", 
     ylim = c(-3,3), col = "firebrick") 
plot(ppd[51,] ~ df$sAge, main = "Simulated", 
     ylim = c(-3,3), col = "firebrick") 
plot(ppd[52,] ~ df$sAge, main = "Simulated", 
     ylim = c(-3,3), col = "firebrick")

2. Posterior predictive distribition plots for a categorical predictor variable with small group size

par(mfrow = c(2,2))
stripchart(slgCort ~ breed, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20, main = "Observed")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")

ANALYSING THE POSTERIOR DISTRIBUTION

1. Plot conditional effects from model

plot(conditional_effects(model), ask = FALSE)

b. format and plot conditional effects for age

ce <- conditional_effects(model, "sAge")
plot(ce, plot = FALSE)[[1]] +
         theme_bw() +
         labs(title = "Conditional effect of age on hair cortisol") +
         labs(y = paste0("Log hair cortisol (standardised)")) +
         labs(x = paste0("Age (standardised)")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey50", size = 10),
               legend.position = "inside", legend.position.inside = c(0.93, 0.80))

2. mcmc_plot of model

a.just parameters of beta variables

mcmc_plot(model,
          variable = c(
         "b_sAge",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother"))

age vs prior
1. distributional
mcmc_plot(model,
          variable = c("b_sAge", "prior_b_sAge"))

2. density
mcmc_plot(model,
          variable = c("b_sAge", "prior_b_sAge"),
          type = "areas") +

   theme_classic() +
    labs(title = "Prior vs posterior distribution for age effect") +
         labs(y = "") +
         labs(x = paste0("Possible parameter values")) +
    scale_y_discrete(labels=c("prior_b_sAge" = "Prior", "b_sAge" = "Posterior"),
                     limits = c("prior_b_sAge", "b_sAge")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

b. all parameters except nu

mcmc_plot(model, variable = c(
         "b_Intercept",
         "sigma",
         "b_sAge",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother"))

3. Plot all posterior distributions

posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_Intercept", "sigma",
         "b_sAge",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother"),
# arbitrary threshold for shading probability mass
prob = 0.75)

4. plot posterior distribution for for betas

posterior <- as.matrix(model)
mcmc_areas(posterior,
    pars = c("b_sAge",
            "b_breedckcs",
            "b_breedpug",
            "b_breedret",
            "b_breedother"),
    prob = 0.75) # arbitrary threshold for shading probability mass

Plot posterior distribution for age

posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = "b_sAge",
# arbitrary threshold for shading probability mass
prob = 0.97) +
  
   theme_classic() +
     labs(title = "Posterior distribution for age effect", 
         y = "Density distribution", 
         x = "Possible parameter values") +
    scale_y_discrete(labels=("b_sAge" = "")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

5. Describe the posterior visually

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
plot(hdi_range, show_intercept = T)

just sAge

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95), parameters = "sAge")
plot(hdi_range, show_intercept = T) +

    labs(title = "Posterior distribution for age effect") +
         labs(y = "Density distribution") +
         labs(x = "Possible parameter values") +
           theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))

6. Posterior prediction for effect of sAge on Log cortisol (when visit = vO, breed = mix,)

# determine the range of `a` values we'd like to feed into `fitted()`
nd <- tibble(sAge = seq(from = -3.2, to = 3.2, length.out = 30), visit = "v0", breed = "mix")

# now use `fitted()` to get the model-implied trajectories
fitted(model,
       newdata = nd) %>% 
  data.frame() %>% 
  bind_cols(nd) %>% 
  
  # plot
  ggplot(aes(x = sAge)) +
  geom_smooth(aes(y = Estimate, ymin = Q2.5, ymax = Q97.5),
              stat = "identity",
              fill = "#F8766D", color = "#F8766D", alpha = 1/5, linewidth = 1) +
  geom_point(data = df, 
             aes(y = slgCort), 
             size = 2, color = "#F8766D") +
  labs(x = "Age (standardised)",
       y = "Log hair cortisol (standardised)") +
  coord_cartesian(xlim = range(df$sAge), 
                  ylim = range(df$slgCort)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

HYPOTHESIS TESTS

1. Hypothesis test to check if mean association between cortisol and body fat (from draws) is >0

draws <- as.matrix(model)
mean(draws[,2] >0)
## [1] 0.1186667
mean(draws[,2] <0)
## [1] 0.8813333

2. Check 97% credible interval of with HPDI for body fat from draws

HPDI(draws[,2], prob=0.97)
##      |0.97      0.97| 
## -0.3336432  0.1003024

3. Visualising the posterior of a model using numerical and graphical methods

a. Basic (one dog only)

# create new dataframe which contains results of the first dog
new_data <- rbind(df[1,], df[1,], df[1,], df[1,], df[1,])
# Now change one category to be different
new_data$sAge <- c(-2, -1, 0, 1, 2)
# Visualise df to make sure it has worked
new_data
##   number   group visit season breed_group coat_colour  sex age comorbidity
## 1     c1 stopped    v0 winter         ret        dark Male  43         yes
## 2     c1 stopped    v0 winter         ret        dark Male  43         yes
## 3     c1 stopped    v0 winter         ret        dark Male  43         yes
## 4     c1 stopped    v0 winter         ret        dark Male  43         yes
## 5     c1 stopped    v0 winter         ret        dark Male  43         yes
##   fat_percent cortisol   lgCort breed     sFat sAge   slgCort
## 1    52.21393  4.92422 1.594166   ret 1.500213   -2 0.3415375
## 2    52.21393  4.92422 1.594166   ret 1.500213   -1 0.3415375
## 3    52.21393  4.92422 1.594166   ret 1.500213    0 0.3415375
## 4    52.21393  4.92422 1.594166   ret 1.500213    1 0.3415375
## 5    52.21393  4.92422 1.594166   ret 1.500213    2 0.3415375
# Now get mean predictions from the draws of the model
pred_means <- posterior_predict(model, newdata = new_data)

# Compare difference in means between the two categories (-2 vs 0)
difference <- pred_means[,1] - pred_means[,3]
# Visualise
head(difference)
## [1]  0.7890468  1.7136939  2.6446566 -1.2212176  0.1610320 -0.5998717
# Examine mean of difference
mean(difference)
## [1] 0.2415734
# View histogram of this
hist(difference)

# Create HPDI
HPDI(difference, 0.97)
##     |0.97     0.97| 
## -3.111882  3.475269
# Compare difference in means between the two categories (-1 vs 0)
difference <- pred_means[,2] - pred_means[,3]
# Visualise
head(difference)
## [1] -0.7748153  0.1780925  0.6708876 -1.7862811 -0.3914311 -1.0592739
# Examine mean of difference
mean(difference)
## [1] 0.1105525
# View histogram of this
hist(difference)

# Create HPDI
HPDI(difference, 0.97)
##     |0.97     0.97| 
## -3.246402  3.295749
# Compare difference in means between the two categories (1 vs 0)
difference <- pred_means[,4] - pred_means[,3]
# Visualise
head(difference)
## [1] -0.8323817  2.1121423  1.8819575 -2.0940779 -0.1748980 -0.1650800
# Examine mean of difference
mean(difference)
## [1] -0.1127996
# View histogram of this
hist(difference)

# Create HPDI
HPDI(difference, 0.97)
##     |0.97     0.97| 
## -3.506415  3.102526
# Compare difference in means between the two categories (2 vs 0)
difference <- pred_means[,5] - pred_means[,3]
# Visualise
head(difference)
## [1] -1.0363067  0.3840559 -0.4776381 -2.5410849 -0.1238849  0.6196964
# Examine mean of difference
mean(difference)
## [1] -0.2354072
# View histogram of this
hist(difference)

# Create HPDI
HPDI(difference, 0.97)
##     |0.97     0.97| 
## -3.530306  3.045350

b. Advanced (all dogs)

# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$Age <- c(-1)

# create new dataframe which contains results of the first dog
new_data2 <- df
# Now change one category to be different
new_data2$sAge <- c(1)

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_mean <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_mean)

# mean difference
mean(pred_diff_mean)
## [1] 0.1133225
# Create HPDI
HPDI(pred_diff_mean, 0.97)
##      |0.97      0.97| 
## -0.1784570  0.3212036

4. Make predictions of log cortisol for each dog and compare with actual data

pred_slgCort <- posterior_epred(model)
av_pred_slgCort <- colMeans(pred_slgCort)
plot(av_pred_slgCort ~ df$slgCort)

1. Plot the predicted effect of age on log hair cortisol

set.seed(666)
nd1 <- tibble(sAge = seq(-3, 3, by = 1), visit = 0,
              breed = "mix",
              case_number = c("dog1", "dog2", "dog3","dog4", "dog5", "dog6", "dog7"))

p1 <-
  predict(model,
          resp = "slgCort",
          allow_new_levels = TRUE,
          newdata = nd1) %>% 
  data.frame() %>% 
  bind_cols(nd1) %>% 
  
  ggplot(aes(x = sAge, y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
  
  geom_linerange(aes(ymin = Estimate - Est.Error, ymax = Estimate + Est.Error),
                 linewidth = 1, color = "#F8766D", alpha = 3/5) +
  geom_point(size = 5, color = "#F8766D") +
  theme_bw() +
  labs(title = "Predicted effect of age on hair cortisol",
       x = "Age (standardised)",
       y = "Log hair cortisol (standardised)") +
  theme(axis.title.y = element_text(size=12, face="bold"), 
        axis.title.x = element_text(size=12, face="bold"),
        title = element_text(size=12, face="bold"),
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim = c(-2.5, 2.5)) +
  scale_x_continuous(breaks=seq(-3, 3 , 1))

plot(p1)